Subgrad

计算逐元素减法(Sub)操作的梯度。该算子是 Sub 算子的反向传播(backward pass)部分,支持广播。

\[\text{dx1} = \frac{\partial L}{\partial X1} = \frac{\partial L}{\partial Y} \times 1 = \frac{\partial L}{\partial Y}\]
\[\text{dx2} = \frac{\partial L}{\partial X2} = \frac{\partial L}{\partial Y} \times (-1) = -\frac{\partial L}{\partial Y}\]

其中对于前向操作 \(Y = X1 - X2\)dy 是来自后一层的上游梯度,dx1dx2 分别是对 X1X2 的梯度。

输入:
  • dy - 上游梯度数据地址(即 \(\frac{\partial L}{\partial Y}\))。

  • params - 参数打包成数组:
    • dx1_dims - 前向传播时第一个输入 x1 的维度信息数组(int*)。

    • dx2_dims - 前向传播时第二个输入 x2 的维度信息数组(int*)。

    • dy_dims - 上游梯度 dy 的维度信息数组(int*)。

    • num_dims - 维度数(int)。

    • temp_space - 临时空间。

  • core_mask - 核掩码(int),仅共享存储版本需要。

输出:
  • dx1 - 对 x1 的梯度数据地址。

  • dx2 - 对 x2 的梯度数据地址。

支持平台:

FT78NE MT7004

备注

  • MT7004 支持fp16, fp32

  • FT78NE 支持fp32

  • 当输入张量被广播时,算子会自动处理广播维度的梯度累加

共享存储版本:

void hp_sub_grad_s(half *dy, half *dx1, half *dx2, long long *params, int core_mask)
void fp_sub_grad_s(float *dy, float *dx1, float *dx2, long long *params, int core_mask)

C调用示例:

 1//MT7004示例
 2#include <stdio.h>
 3#include <subgrad.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *dy = (float *)0x81000000;
 7    float *dx1 = (float *)0x82000000;
 8    float *dx2 = (float *)0x83000000;
 9    float *checkdx1 = (float *)0x84000000;
10    float *checkdx2 = (float *)0x85000000;
11    int *tempsapce = (int *)0x86000000;
12
13    srand(seed++);
14    int i;
15
16    // same shape
17    int dx1_dims[] = {16, 16, 64};  // 2x2
18    int dx2_dims[] = {1, 16, 64};  // 2x2
19    int dy_dims[] = {16, 16, 64};  // 2x2
20    int num_dims = 3;
21
22    int dx1_num = get_total_elements(num_dims, dx1_dims);
23    int dx2_num = get_total_elements(num_dims, dx2_dims);
24    int dy_num  = get_total_elements(num_dims, dy_dims);
25
26
27    for (i = 0; i < dy_num; ++i) {
28        dy[i] = (float)(rand() % 100) / 10.0f;
29    }
30
31    long long params[17];
32    params[0] = (unsigned long long)dx1_dims;
33    params[1] = (unsigned long long)dx2_dims;
34    params[2] = (unsigned long long)dy_dims;
35    params[3] = (unsigned long long)num_dims;
36    params[4] = (unsigned long long)tempsapce;
37    int core_mask = 0x0f;
38    fp_sub_grad_s(dy, dx1, dx2, params, core_mask);
39    return 0;
40
41    return 0;
42}

私有存储版本:

void hp_subgrad_p(half *dy, long long *params, half *dx1, half *dx2)
void fp_subgrad_p(float *dy, long long *params, float *dx1, float *dx2)

C调用示例:

 1//MT7004示例
 2#include <stdio.h>
 3#include <subgrad.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *dy = (float *)0x10010000;
 7    float *dx1 = (float *)0x10020000;
 8    float *dx2 = (float *)0x10030000;
 9    float *checkdx1 = (float *)0x10040000;
10    float *checkdx2 = (float *)0x10050000;
11    int *tempsapce = (int *)0x10060000;
12
13    srand(seed++);
14    int i;
15
16    // same shape
17    int dx1_dims[] = {16, 16, 64};  // 2x2
18    int dx2_dims[] = {1, 16, 64};  // 2x2
19    int dy_dims[] = {16, 16, 64};  // 2x2
20    int num_dims = 3;
21
22    int dx1_num = get_total_elements(num_dims, dx1_dims);
23    int dx2_num = get_total_elements(num_dims, dx2_dims);
24    int dy_num  = get_total_elements(num_dims, dy_dims);
25
26
27    for (i = 0; i < dy_num; ++i) {
28        dy[i] = (float)(rand() % 100) / 10.0f;
29    }
30
31    long long params[17];
32    params[0] = (unsigned long long)dx1_dims;
33    params[1] = (unsigned long long)dx2_dims;
34    params[2] = (unsigned long long)dy_dims;
35    params[3] = (unsigned long long)num_dims;
36    params[4] = (unsigned long long)tempsapce;
37
38    fp_sub_grad_p(dy, dx1, dx2, params);
39    return 0;
40}